import re

from DrissionPage import ChromiumPage

with open('res/book.md', 'r', encoding='utf-8') as file:
    lines = file.readlines()


def extract_http_links(text):
    # 定义正则表达式模式来匹配HTTP和HTTPS链接
    url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    # 使用findall方法找到所有匹配的链接
    links = url_pattern.findall(text)
    return links


def get_string_after_char(text, char):
    parts = text.partition(char)
    if parts[1] == "":
        return ""
    return parts[2]


for line in lines:
    if line.find('百度云链接') != -1:
        text = line.strip()

        print(text)

        http_links = extract_http_links(text)

        code = get_string_after_char(text, '提取码：')

        for link in http_links:
            print(link, code)

            page = ChromiumPage()

            page.get(link)
